library(zoo) #Date manipulation package
library(smooth) #For Exponential Smoothing
library(greybox)
library(dplyr)
library(tidyr)
library(leaflet)
library(gencve)
library(plotly)
library(forecast)
library(tseries)
library(x13binary)
library(seasonalview)
setwd("E:/College Notes/GE-FEL DWIADS/Time Series")
#Import Data and Manage Date
Reg8 <-read.csv("Final Regional Data.csv", header=T)
head(Reg8)
Here, one can see the working data that will be used for the analyses and forecasting. The data was obtained from the DOH COVID-19 Tracker website. Description of the Data:
ï..Week corresponds to the week dates seen in the DOH COVID-19 Tracker page;Wname is a simplified identifier for the data points;Confirmed, Death, and Recovered are the numerical data that will be used in the analyses;Provres specifies the region to be studiedLatitude and Longitude column specifies the location of the regional center (can be used in mapping the cases in a map).NReg8<-Reg8[-c(1,64),]
NReg8<-NReg8%>%
mutate(Active = Confirmed - Death - Recovered) %>%
mutate(Active_total = cumsum(Active),
Recovered_total = cumsum(Recovered),
Death_total = cumsum(Death))
NReg8$Wname<-factor(NReg8$Wname,levels=NReg8$Wname)
Note: The data for the visualization of the Moving Average was obtained through the rollmean function of the zoo R package. It has been checked and confirmed to be in agreement with the moving average found in the DOH website.
NCma<-zoo::rollmean(NReg8$Confirmed,k=4,fill=NA)
NReg8%>%
plot_ly(x = ~ Wname,
y = ~ Confirmed,
name = 'Confirmed',
marker=list(color = '#4682b4'),
type = 'bar') %>%
add_trace(x = ~ Wname,
y = ~ NCma,
type = 'scatter',
name = '4-Week Moving Average',
mode='lines',
line=list(color='#FFA500'),
marker=list(color='#FFA500'),
hoverinfo="text",
text=~paste("4-Week Moving Average:",NCma))%>%
layout(title = "Confirmed COVID-19 Cases - Eastern Visayas",
legend = list(x = 0.1, y = 0.9),
yaxis = list(title = "Number of Cases"),
xaxis = list(title = "Source: <a href='https://doh.gov.ph/covid19tracker'>DOH COVID-19 Tracker</a>",tickangle=45))
NDma<-zoo::rollmean(NReg8$Death,k=4,fill=NA)
NReg8%>%
plot_ly(x = ~ Wname,
y = ~ Death,
name = 'Death',
marker=list(color = '#ff0000'),
type = 'bar')%>%
add_trace(x = ~ Wname,
y = ~ NDma,
type = 'scatter',
name = '4-Week Moving Average',
mode='lines',
line=list(color='#FFA500'),
marker=list(color='#FFA500'),
hoverinfo="text",
text=~paste("4-Week Moving Average:",NDma))%>%
layout(title = "COVID-19 Deaths - Eastern Visayas",
legend = list(x = 0.1, y = 0.9),
yaxis = list(title = "Number of Cases"),
xaxis = list(title = "Source: <a href='https://doh.gov.ph/covid19tracker'>DOH COVID-19 Tracker</a>",tickangle=45))
NRma<-zoo::rollmean(NReg8$Recovered,k=4,fill=NA)
NReg8%>%
plot_ly(x = ~ Wname,
y = ~ Recovered,
name = 'Recovered',
marker=list(color = '#3cb371'),
type = 'bar') %>%
add_trace(x = ~ Wname,
y = ~ NRma,
type = 'scatter',
name = '4-Week Moving Average',
mode='lines',
line=list(color='#FFA500'),
marker=list(color='#FFA500'),
hoverinfo="text",
text=~paste("4-Week Moving Average:",NRma))%>%
layout(title = "COVID-19 Recoveries - Eastern Visayas",
legend = list(x = 0.1, y = 0.9),
yaxis = list(title = "Number of Cases"),
xaxis = list(title = "Source: <a href='https://doh.gov.ph/covid19tracker'>DOH COVID-19 Tracker</a>",tickangle=45))
NReg8%>%
plot_ly(x = ~ Wname,
y = ~ Death,
name = 'Death',
marker=list(color = '#ff0000'),
type = 'bar')%>%
add_trace(x = ~ Wname,
y = ~ Recovered,
name = 'Recovered',
marker=list(color = '#3cb371'),
type = 'bar')%>%
layout(title = "COVID-19 Case Turnout - Eastern Visayas",
legend = list(x = 0.1, y = 0.9),
yaxis = list(title = "Number of Cases"),
xaxis = list(title = "Source: <a href='https://doh.gov.ph/covid19tracker'>DOH COVID-19 Tracker</a>",tickangle=45),
barmode='stack')
ModReg8C.ts<-ts(Reg8$Confirmed[2:63],frequency=52,start=c(2020,3,5))
ModReg8C.ts
Time Series:
Start = c(2020, 3)
End = c(2021, 12)
Frequency = 52
[1] 26 13 4 1 2 31 4 3 5 53 7 5 12 96 252 131 102 20 79 121 170 239 328 434 665 455 279 319 286 288
[31] 384 478 523 321 432 484 380 334 272 350 532 730 535 555 724 562 469 477 321 242 265 220 282 182 202 336 281 268 308 486
[61] 431 195
#Exponential Smoothing Using the smooth Package
ModReg8C.ses<-es(ModReg8C.ts,model="ANN",h=12,holdout=FALSE,intervals="parametric",
level=0.95,cfType="MSE")
ModReg8C.des<-es(ModReg8C.ts,model="AAdN",h=12,holdout=FALSE,intervals="parametric",
level=0.95,cfType="MSE")
ModReg8C.hwn<-es(ModReg8C.ts,model="AAN",h=12,holdout=FALSE,intervals="parametric",
level=0.95,cfType="MSE")
ModReg8C.hwa<-es(ModReg8C.ts,model="AAA",h=12,holdout=FALSE,intervals="parametric",
level=0.95,cfType="MSE")
Sorry, but we don't have enough observations for the seasonal model!
Switching to non-seasonal.
ModReg8C.hwm<-es(ModReg8C.ts,model="AAM",h=12,holdout=FALSE,intervals="parametric",
level=0.95,cfType="MSE")
Sorry, but we don't have enough observations for the seasonal model!
Switching to non-seasonal.
ModReg8C.auto<-es(ModReg8C.ts,model="ZZZ",h=12,holdout=FALSE,intervals="parametric",
level=0.95,cfType="MSE")
Sorry, but we don't have enough observations for the seasonal model!
Switching to non-seasonal.
Seasonal models cannot be used due to the lack of observations (data size is just 60+, compared to the backdrop of 52 weeks per year). Therefore, AAA and AAM will not be used. We are left with ANN, AAdN and AAN models.
#ANN means Additive errors, No trend, No seasonality (SES)
ModReg8C.ses
Time elapsed: 0.04 seconds
Model estimated: ETS(ANN)
Persistence vector g:
alpha
1
Initial values were optimised.
Loss function type: likelihood; Loss function value: 375.2523
Error standard deviation: 104.5759
Sample size: 62
Number of estimated parameters: 2
Number of provided parameters: 1
Number of degrees of freedom: 60
Information criteria:
AIC AICc BIC BICc
754.5046 754.7080 758.7589 759.1786
#AAdN means Additive errors, Additive (dampled) trend, No seasonality (DES)
ModReg8C.des
Time elapsed: 0.04 seconds
Model estimated: ETS(AAdN)
Persistence vector g:
alpha beta
1 0
Damping parameter: 0.9647
Initial values were optimised.
Loss function type: likelihood; Loss function value: 375.1587
Error standard deviation: 106.2032
Sample size: 62
Number of estimated parameters: 4
Number of provided parameters: 2
Number of degrees of freedom: 58
Information criteria:
AIC AICc BIC BICc
758.3174 759.0192 766.8260 768.2741
#AAN means Additive errors, Additive trend, No seasonality (HW no seasonality)
ModReg8C.hwn
Time elapsed: 0.03 seconds
Model estimated: ETS(AAN)
Persistence vector g:
alpha beta
1 0
Initial values were optimised.
Loss function type: likelihood; Loss function value: 375.2302
Error standard deviation: 105.4208
Sample size: 62
Number of estimated parameters: 3
Number of provided parameters: 2
Number of degrees of freedom: 59
Information criteria:
AIC AICc BIC BICc
756.4604 756.8742 762.8418 763.6957
#ZZZ means all components are (Z) estimated 21
ModReg8C.auto
Time elapsed: 0.14 seconds
Model estimated: ETS(ANN)
Persistence vector g:
alpha
1
Initial values were optimised.
Loss function type: likelihood; Loss function value: 375.2523
Error standard deviation: 104.5759
Sample size: 62
Number of estimated parameters: 2
Number of provided parameters: 1
Number of degrees of freedom: 60
Information criteria:
AIC AICc BIC BICc
754.5046 754.7080 758.7589 759.1786
It can be seen above that the ETS(ANN) model produces the least Akaike Information Criterion (AIC). It can also be seen that for ModReg8C.auto, its function returned an ETS(ANN) model.
We will choose the best smoothing model by checking the lowest value.
MAPE(ModReg8C.ts,ModReg8C.ses$fitted)
[1] 0.6918303
MAPE(ModReg8C.ts,ModReg8C.des$fitted)
[1] 1.138553
MAPE(ModReg8C.ts,ModReg8C.hwn$fitted)
[1] 0.7899619
MAPE(ModReg8C.ts,ModReg8C.auto$fitted)
[1] 0.6918303
From the results, the Additive, No trend, No seasonality (ANN) model will be used.
forecast(ModReg8C.auto,h=4)
Time Series:
Start = c(2021, 13)
End = c(2021, 16)
Frequency = 52
Point forecast Lower bound (2.5%) Upper bound (97.5%)
2021.231 195 -12.39130 402.3913
2021.250 195 -98.29558 488.2956
2021.269 195 -164.21226 554.2123
2021.288 195 -219.78259 609.7826
plot(forecast(ModReg8C.auto,h=4))
forecast(ModReg8C.auto,h=8)
Time Series:
Start = c(2021, 13)
End = c(2021, 20)
Frequency = 52
Point forecast Lower bound (2.5%) Upper bound (97.5%)
2021.231 195 -12.39130 402.3913
2021.250 195 -98.29558 488.2956
2021.269 195 -164.21226 554.2123
2021.288 195 -219.78259 609.7826
2021.308 195 -268.74104 658.7410
2021.327 195 -313.00285 703.0029
2021.346 195 -353.70579 743.7058
2021.365 195 -391.59117 781.5912
plot(forecast(ModReg8C.auto,h=8))
From the results, a flat horizontal line forecast is predicted. The horizontal line forecast may be caused due to backlogs and missing data. According to an article in Wolters Kluwer, “in the absence of real patterns, the best forecast might just be a straight line…. The error on a straight-line forecast may be higher than the error you see on other forecasts; not because the forecast was poor, but because the historical data is truly random.” For Chase, 2019, flat-line forecasts are generated when a given demand history/data has no detectable trend or seasonality, thus just reflecting the current demand level.
plot.ts(Reg8$Confirmed[2:63],
ylab="Confirmed",
xlab="Date")
datac.model <-ets(ts(Reg8$Confirmed[2:63],frequency=52,start=c(2020,3,11)),model="ZNN")
forecast(datac.model,h=30)
plot(forecast(datac.model,h=30),
main="Forecast on the Number of COVID-19 Cases in Region VIII",
col="red",
xlab="Date",
ylab="Number of Cases")
adf.test(ModReg8C.ts, alternative="stationary")
Augmented Dickey-Fuller Test
data: ModReg8C.ts
Dickey-Fuller = -1.9935, Lag order = 3, p-value = 0.5775
alternative hypothesis: stationary
adf.test(diff(ModReg8C.ts), alternative="stationary")
p-value smaller than printed p-value
Augmented Dickey-Fuller Test
data: diff(ModReg8C.ts)
Dickey-Fuller = -4.4387, Lag order = 3, p-value = 0.01
alternative hypothesis: stationary
The ADF Test result for ModReg8C.ts suggests non-stationarity while the result for diff(ModReg8C.ts) suggests stationarity, hinting that d=1. This is due to the p-value for ModReg8C.ts being higher than \(\alpha = 0.05\) while the p-value for diff(ModReg8C.ts) is lower than \(\alpha\), suggesting the rejection of the null hypothesis of unit root presence (stationarity).
plot(ModReg8C.ts)
acf(ModReg8C.ts)
pacf(ModReg8C.ts)
plot(diff(ModReg8C.ts))
acf(diff(ModReg8C.ts))
pacf(diff(ModReg8C.ts))
arima()conf.arima <- arima(ModReg8C.ts,order=c(0,1,1))
conf.arima
Call:
arima(x = ModReg8C.ts, order = c(0, 1, 1))
Coefficients:
ma1
0.0343
s.e. 0.2238
sigma^2 estimated as 10752: log likelihood = -369.68, aic = 743.37
conf.pred <- forecast(conf.arima, h = 48, level=c(97.5))
conf.pred
plot(conf.pred)
auto.arima()conf.aarima<-auto.arima(ModReg8C.ts,max.order = 12,trace=TRUE,seasonal=FALSE)
Fitting models using approximations to speed things up...
ARIMA(2,1,2) with drift : 736.263
ARIMA(0,1,0) with drift : 734.2717
ARIMA(1,1,0) with drift : 737.4646
ARIMA(0,1,1) with drift : 736.4677
ARIMA(0,1,0) : 732.1761
ARIMA(1,1,1) with drift : 736.3936
Now re-fitting the best model(s) without approximations...
ARIMA(0,1,0) : 741.4594
Best model: ARIMA(0,1,0)
conf.aarima
Series: ModReg8C.ts
ARIMA(0,1,0)
sigma^2 estimated as 10757: log likelihood=-369.7
AIC=741.39 AICc=741.46 BIC=743.5
auto.arima to find p,d, and qautoaic <- auto.arima(Reg8$Confirmed[2:63],ic='aic')
autoaic
Series: Reg8$Confirmed[2:63]
ARIMA(0,1,0)
sigma^2 estimated as 10757: log likelihood=-369.7
AIC=741.39 AICc=741.46 BIC=743.5
autobic <- auto.arima(Reg8$Confirmed[2:63],ic='bic')
autobic
Series: Reg8$Confirmed[2:63]
ARIMA(0,1,0)
sigma^2 estimated as 10757: log likelihood=-369.7
AIC=741.39 AICc=741.46 BIC=743.5
autoaicc <- auto.arima(Reg8$Confirmed[2:63],ic='aicc')
autoaicc
Series: Reg8$Confirmed[2:63]
ARIMA(0,1,0)
sigma^2 estimated as 10757: log likelihood=-369.7
AIC=741.39 AICc=741.46 BIC=743.5
All three ICs suggests a p,d,q order of 0,1,0, which is similar to the auto.arima-suggested model without specifying the IC.
The second method automatically checks the ARIMA with the lowest AIC, BIC, and AICC respectively. All agreed that the lowest was ARIMA(0,1,0) which also coincides with the result of the first method.
conf.pred <- forecast(conf.aarima, h = 48, level=c(97.5))
conf.pred
plot(conf.pred)
Here, we see the ARIMA ETS graph and forecast using ARIMA(0,1,0). Similar to the forecast earlier from exponential smoothing , a horizontal line forecast is seen.
Leaflet R PackageIn creating a simple map plot, one can use the Leaflet package. Leaflet is an “open-source, JavaScript library for interactive maps” [https://rstudio.github.io/leaflet/]. Leaflet can create maps that can be used directly from the R console, RStudio, and R Markdown documents [https://cran.r-project.org/web/packages/leaflet/index.html]. The package has the capability to map polygons, GeoJSON; create map layers, add map labels, and many more.
library(leaflet)
Here, Leaflet is used to create a simple map plot for the COVID-19 Statistics of Eastern Visayas (Region VIII). To plot the borders of the region, one can manually enter the lengths of the border to create a polygon, or download a shapefile/GeoJSON/TopoJSON containing the specifications of the region’s border lengths. In our case, we used a GeoJSON file obtained from OpenStreetMap. The OSM Relation ID for Eastern Visayas is 3759193.
The codes used below were guided by the tutorials/guides from Earth Lab, an RStudio Pubs guide by Matt Dray, a guide by Jeremy Oakley and from the official RStudio guide by the creators of Leaflet.
labR8<-paste(sep="</br>",
"<b>Eastern Visayas COVID-19 Statistics:</b>",
"(As of May 13,2021)",
paste0("Confirmed Cases:",sum(Reg8$Confirmed)),
paste0("Active Cases:",NReg8$Active_total+Reg8$Confirmed[64]),
paste0("Deaths:",NReg8$Death_total),
paste0("Recovered:",NReg8$Recovered_total))
ntopo<-rgdal::readOGR("EVisayas.geojson")
OGR data source with driver: GeoJSON
Source: "E:\College Notes\GE-FEL DWIADS\Time Series\EVisayas.geojson", layer: "EVisayas"
with 1 features
It has 0 fields
leaflet(ntopo, width='100%')%>%
addTiles()%>%
addPolygons(stroke=TRUE,
smoothFactor = 0.3,
fillOpacity = 0.005)%>%
addPopups(lng=125,lat=11.24,labR8,
options=popupOptions(closeButton = FALSE,
attribution="From <a href='https://doh.gov.ph/covid19tracker'>DOH COVID-19 Tracker</a>",
closeOnClick = FALSE))